


# Load rGertude
source('https://trimweb.rba.gov.au/record/5368799')

# If that doesn't work you can install the full RBAverse.
#source("https://trimweb.rba.gov.au/record/6319351")
#rbaverse::install()

# Load SAFFI
#source('https://trimweb.rba.gov.au/record/6639794') 



#options(error=recover)
options(error=NULL, warn=0)

#install.packages(c('dplyr', 'tidyr', 'rGertrude', 'zoo', 'SAFFI2R'))
#library(Rtools)
library(dplyr)
#library(readxl)
library(tidyr)
library(rGertrude)
library(zoo)
library(SAFFI2R)
library(data.table)




# The sample covers the March Quarter 2008 to June Quarter 2018, for the ADIs that 
#   have submitted the quarterly RRF320.8 form to APRA for at least the past three years.


sample_banks <- c('AMP',
                  'ANZ',
                  'AUS',
                  'AWB',
                  'BBL',
                  'BQL',
                  'CBA',
                  'CPS',
                  'CTI',
                  'CUA',
                  'DBL',
                  'GBL',
                  'HBA',
                  'HBL',
                  'IMB',
                  'ING',
                  'MBL',
                  'MEB',
                  'MEC',
                  'MET',
                  'MSB',
                  'NAB',
                  'NPB',
                  'PBL',
                  'PFS',
                  'PNL',
                  'QML',
                  'QTM',
                  'TMB',
                  'VTL',
                  'WBC')

#########################################################################################
# SAFFI data

read_format_saffi <- function(remove_extra_variables) {
  
  # Pre-written series names
  saf_names <- read.csv('SAFFI_itemcode_names.csv')
  
  # Where multiple forms/queries are required for the same variable, i.e. due to the 
  #   fomr changes, sometimes both queries will have the same item code, sometimes not. 
  #   Where they do, the forms are joint together in an initial step. The others are
  #   joint subsequently.
  
  # These forms  require combining straight away because there are multiple items with 
  #   the same code.
  # Capital requires two forms depending on whether banks use IRB ('advanced') or 
  #   standardised ('non advanced') capital approaches.
  saf_cap_adv <- saffi2_query(query_id=17357)
  saf_cap_nonadv <- saffi2_query(query_id=17346)
  # There were some issues with one bank's reporting in March 2008.
  # See AFS' capital spreadhseet "REMOVED"
  #   to see the workings for what the actual numbers should be. 
  adjusted_bank <- "REMOVED"
  saf_march08capital <- saffi2_query(query_id=17354)
  saf_march08RWA <- saffi2_query(query_id=17353)
  saf_cap_adv[saf_cap_adv$period == '2008-03-31' & 
                saf_cap_adv$institution_code == adjusted_bank, 
              c('CS02011', 'CS02025', 'CS17744')] <- 
    c(saf_march08capital[, c('CS02011', 'CS02025')], sum(saf_march08RWA[, -(1:2)]))
  
  # Check for overlaps before joining
  if(any(saf_cap_adv$institution_code %in% saf_cap_nonadv$institution_code)) {
    warning('The advanced and nonadvanced capital queries are not mutually exclusive')
  }
  saf_cap <- rbind(saf_cap_adv, saf_cap_nonadv) 

  # Commercial loans have different forms for different states, using the same item codes.
  # This is the only query with multiple forms.
  saf_com_raw <- saffi2_query(query_id=17341) 
  saf_com <- 
    data.frame(saf_com_raw[, 1:2], 
               rowSums(saf_com_raw[, grepl('BSAO13063', colnames(saf_com_raw))]), 
               rowSums(saf_com_raw[, grepl('BSAO13064', colnames(saf_com_raw))]))
  colnames(saf_com) <- c('period', 'institution_code', 'BSAO13063', 'BSAO13064')
  
  # Load the other queries.
  saf_mortg <- saffi2_query(query_id=17342)
  saf_balsht_new720.A_form <- saffi2_query(query_id=17355)
  saf_balsht_old320.0_form <- saffi2_query(query_id=17356)
  saf_bfs <- saffi2_query(query_id=17349)
  saf_intinc <- saffi2_query(query_id=19496) 
  
  # From here, the datasets can just be combined with full_join. The saf_names file 
  #   indicates were two item codes need to be summed into one variable (usually because
  #   of a form change).
  saf_all <- full_join(saf_mortg, 
                       saf_balsht_new720.A_form, by=c('period', 'institution_code')) %>%
    full_join(saf_balsht_old320.0_form, by=c('period', 'institution_code')) %>% 
    full_join(saf_cap, by=c('period', 'institution_code')) %>%
    full_join(saf_com, by=c('period', 'institution_code')) %>% 
    full_join(saf_bfs, by=c('period', 'institution_code')) %>%
    full_join(saf_intinc, by=c('period', 'institution_code'))
  # Pull the variable names from the saf_names object
  names <- as.character(saf_names$name[match(colnames(saf_all), saf_names$Form_Item)])
  names[1:2] <- c('period', 'bank')
  colnames(saf_all) <- names
  
  # sum the columns that need to be summed
  saf_all$PRE_Deposits <- 
    rowSums(saf_all[, c('SUM1_PRE_Deposits', 'SUM2_PRE_Deposits')], na.rm=TRUE)
  saf_all <- saf_all %>%
    mutate(Assets = ifelse(!is.na(POST_Assets) & POST_Assets > 0, 
                           POST_Assets, PRE_Assets)) %>%
    mutate(Liabilities = ifelse(!is.na(POST_Liabilities) & POST_Liabilities > 0, 
                                POST_Liabilities, PRE_Liabilities)) %>%
    mutate(Deposits = ifelse(!is.na(POST_Deposits) & POST_Deposits > 0, 
                             POST_Deposits, PRE_Deposits)) %>%
    mutate(Cash = ifelse(!is.na(POST_Cash) & POST_Cash > 0, 
                         POST_Cash, PRE_Cash)) %>%
    mutate(RBAbalance = ifelse(!is.na(POST_RBAbalance) & POST_RBAbalance > 0, 
                               POST_RBAbalance, PRE_RBAbalance)) %>%
    arrange(period, bank)
  # Remove the variables that have been replaced by sums
  saf_all <- saf_all[, !substr(colnames(saf_all), 1, 3) %in% c('SUM', 'PRE', 'POS')]
  # Remove the non-quarter dates
  saf_all <- saf_all[substr(saf_all$period, 6, 7) %in% c('03', '06', '09', '12'), ]
  saf_all <- data.frame(saf_all)
  
  # Dealing with merger activivity:
  # 1) Bendigo (BBL) and Adelaide Bank (ADL).
  #   - 2009-03 and prior, sum ADL to BBL and remove ADL
  # 2) Commonwealth (CBA) and Bankwest (BWA)
  #   - 2012-09 and prior, sum BWA to CBA and remove BWA.
  # 3) Westpac (WBC) and St george (STG)
  #   - 2009-12 and prior, sum STG to WBC and remove STG
  
  # All the 'numeric' columns can just be summed across the two entities. First set
  #   NA values for the entity to be removed to zero. 
  num_cols_B <- !colnames(saf_all) %in% c('period', 'bank')
  
  rmv_banks <- c('ADL', 'BWA', 'STG')
  saf_all[saf_all$bank %in% rmv_banks, ][
    is.na(saf_all[saf_all$bank %in% rmv_banks, ])] <- 0
  
  saf_all[saf_all$bank == 'BBL' & saf_all$period <= '2009-03-31', num_cols_B] <-
    saf_all[saf_all$bank == 'BBL' & saf_all$period <= '2009-03-31', num_cols_B] +
    saf_all[saf_all$bank == 'ADL' & saf_all$period <= '2009-03-31', num_cols_B]

  saf_all[saf_all$bank == 'CBA' & saf_all$period <= '2012-09-30', num_cols_B] <-
    saf_all[saf_all$bank == 'CBA' & saf_all$period <= '2012-09-30', num_cols_B] + 
    saf_all[saf_all$bank == 'BWA' & saf_all$period <= '2012-09-30', num_cols_B]

  saf_all[saf_all$bank == 'WBC' & saf_all$period <= '2009-12-30', num_cols_B] <-
    saf_all[saf_all$bank == 'WBC' & saf_all$period <= '2009-12-30', num_cols_B] + 
    saf_all[saf_all$bank == 'STG' & saf_all$period <= '2009-12-30', num_cols_B]

  saf_all <- saf_all[!saf_all$bank %in% rmv_banks, ]
  saf_all$period <- as.character(saf_all$period)
  
  if(remove_extra_variables) {
    saf_all <- saf_all[, substr(colnames(saf_all), 1, 7) != 'CREDIT.']
  }
  return(saf_all)
}

saffi_D <- read_format_saffi(remove_extra_variables=TRUE)

#write.csv(saffi_D, 'formatted_saffi_data_no_macro_data.csv')
#saffi_D <- read.csv('formatted_saffi_data_no_macro_data.csv')

###############################################################################
# Macro controls data

# Identify sample dates:
dates <- as.character(sort(unique(saffi_D$period)))

# Pull data from GERTRUDE
read_macro_vars_F <- function(dates, end_date) {
  # end_date format: 'YYYY-MM-DD'
  dates <- dates[1:which(dates == end_date)]
  # - TWI
  TWI_dly <- gertrude_timeseries('AUM20000000.000')
  # Identify quarter ends by being in a quarter-end month, and being the last
  #   observation in that month.
  quarter_ends_B <- substr(index(TWI_dly), 6, 7) %in% 
    c('03', '06', '09', '12') &
    c(head(substr(index(TWI_dly), 9, 10), -1) >
        tail(substr(index(TWI_dly), 9, 10), -1), FALSE)
  TWI_q <- as.numeric(TWI_dly[quarter_ends_B])
  # Convert to percentage changes
  TWI_qpd <- 100*(tail(TWI_q, -1)/head(TWI_q, -1) - 1)
  # Align with appropriate dates
  kp <- substr(index(TWI_dly)[quarter_ends_B][-1], 1, 7) %in% 
    substr(dates, 1, 7)
  macro_D <- data.frame(period=dates, TWI_qpd=TWI_qpd[kp])
  # - GDP
  GDP_nfch_qpd <- gertrude_timeseries('AUN1SN30100.100', end_date=end_date)
  GDP_sach_qpd <- gertrude_timeseries('AUN1SN30100.1S0', end_date=end_date)
  macro_D <- data.frame(macro_D, 
                        GDP_qpd=GDP_sach_qpd[as.character(index(GDP_sach_qpd)) %in% 
                                               dates])
  # - CPI
  CPI_tsa_qpd <- gertrude_timeseries('AUC10000001.1S0', end_date=end_date)
  macro_D <- data.frame(macro_D, 
                        CPI_qpd=CPI_tsa_qpd[as.character(index(CPI_tsa_qpd)) %in% dates])
  # - Cash rate (me = month end)
  CR_me <- gertrude_timeseries('AUM80000100.000')
  CR_qe <- CR_me[substr(index(CR_me), 6, 7) %in% c('03', '06', '09', '12')]
  CR_qpd <- 100*(as.numeric(tail(CR_qe, -1))/as.numeric(head(CR_qe, -1)) - 1)
  macro_D <- data.frame(macro_D, 
                        CR_qe=as.numeric(CR_qe)[as.character(index(CR_qe)) %in% dates], 
                        CR_qpd=CR_qpd[tail(as.character(index(CR_qe)) %in% dates, -1)])
  # - House prices
  housepr_qpd <- gertrude_timeseries('AUP30000000.100', end_date=end_date)
  macro_D <- data.frame(macro_D, 
                        housepr_qpd=housepr_qpd[as.character(index(housepr_qpd)) %in% 
                                                  dates])
  rownames(macro_D) <- NULL
  macro_D
}

macro_D <- read_macro_vars_F(dates, '2019-12-31')


############### CANSTAR DATA
read_canstar_data_F <- function() {
  ints <- read.csv('CANSTAR_data.csv')
  # Add (end-month) day to date
  ints$Date <- paste0(ints$Date, 
                      ifelse(substr(ints$Date, 6, 7) %in% c('06', '09'), '-30', '-31'))
  colnames(ints)[1:2] <- c('period', 'bank')
  ints
}

ints_D <- read_canstar_data_F()

# Merge each of the datasets

merge_data_F <- function(saf, mac, ints) {
  #!!! Relies on macro_D first col being 'period'
  y <- saf %>% full_join(ints, by=c('period', 'bank')) %>% 
    left_join(mac, by=c('period')) %>%
    arrange(period, bank)
  y
}

regression_data <- merge_data_F(saffi_D, macro_D, ints_D)

regression_data <- regression_data[regression_data$bank %in% sample_banks, ]



# Check that no date-id combinations are repeated. Should give TRUE.
sum(duplicated(paste(regression_data$period, regression_data$bank))) == 0

# Drop all observations after Sep 2019
regression_data <- 
  regression_data[as.numeric(substr(regression_data$period, 1, 4)) <= 2019 &
                  substr(regression_data$period, 1, 7) != '2019-12', ]
# Adjust for the single-bank shift in investor credit in Dec 2014

outlier_F <- function(data) {
  outlier_bank <- "REMOVED"
  shift_amt <- data$Outs_Occ_ClosBal[data$bank == outlier_bank & data$period == '2014-09-30'] - 
    data$Outs_Occ_ClosBal[data$bank == outlier_bank & data$period == '2014-12-31']
  shift_prds <- unique(data$period[as.numeric(substr(data$period, 1, 4)) < 2015 & 
                                     data$period != '2014-12-31'])
  data$Outs_Inv_ClosBal[data$bank == outlier_bank & data$period %in% shift_prds] <- 
    data$Outs_Inv_ClosBal[data$bank == outlier_bank & data$period %in% shift_prds] + shift_amt
  return(data)
}
regression_data <- outlier_F(regression_data)




# Compute market concentration measures and add to macro variables
market_conc_F <- function(data) {
  periods <- sort(unique(data$period))
  majors <- c('ANZ', 'CBA', 'NAB', 'WBC')
  hh_1d_F <- function(date, data, vars) {
    hh <- data[data$period == date, vars]
    hh <- apply(hh, 2, function(x) x/sum(x, na.rm=TRUE))
    hh <- colSums(hh^2, na.rm=TRUE)
    return(hh)
  }
  vars <- c('Outs_Tot_ClosBal', 'Outs_Inv_ClosBal', 'Outs_Occ_ClosBal', 
            'Net_Int_Inc_Tot', 'Int_Inc_Hous_Loans')
  HH <- data.frame(period=periods, 
                       do.call(rbind, lapply(periods, hh_1d_F, data, vars)))
  colnames(HH) <- c('period', 'hh_TotMort', 'hh_InvMort', 'hh_OccMort', 'hh_NII', 
                    'hh_Mortintinc')
  maj_share_F <- function(date, data, vars) {
    ms <- data[data$period == date, c('bank', vars)]
    ms <- colSums(ms[ms$bank %in% majors, -1], na.rm=TRUE)/
      colSums(ms[, -1], na.rm=TRUE)
  return(ms)
  }
  majshare <- 
    data.frame(period=periods,
               do.call(rbind, lapply(periods, maj_share_F, data, vars)))
  colnames(majshare) <- c('period', 'majsh_TotMort', 'majsh_InvMort', 
                          'majsh_OccMort', 'majsh_NII', 'majsh_Mortintinc')
  data <- data.frame(data, 
                     HH[match(data$period, HH$period), -1],
                     majshare[match(data$period, majshare$period), -1])
  return(data)
}

regression_data <- market_conc_F(regression_data)


# Add the aggregate credit growth variables
agg_growth_F <- function(data) {
  gr <- read.csv('credit_aggs_gr_from_rba_website.csv')
  data <- data.frame(data, 
                     gr[match(data$period, gr$period), -1])
  return(data)
}

regression_data <- agg_growth_F(regression_data)



write.csv(regression_data, 'data_for_macropru_analysis.csv', 
          row.names=FALSE)









